# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.

from typing import List, Optional
from typing_extensions import Literal

from ..model import Model
from ..._models import BaseModel
from .beta_usage import BetaUsage
from .beta_container import BetaContainer
from .beta_stop_reason import BetaStopReason
from .beta_content_block import BetaContentBlock, BetaContentBlock as BetaContentBlock
from .beta_context_management_response import BetaContextManagementResponse

__all__ = ["BetaMessage"]


class BetaMessage(BaseModel):
    id: str
    """Unique object identifier.

    The format and length of IDs may change over time.
    """

    container: Optional[BetaContainer] = None
    """
    Information about the container used in the request (for the code execution
    tool)
    """

    content: List[BetaContentBlock]
    """Content generated by the model.

    This is an array of content blocks, each of which has a `type` that determines
    its shape.

    Example:

    ```json
    [{ "type": "text", "text": "Hi, I'm Claude." }]
    ```

    If the request input `messages` ended with an `assistant` turn, then the
    response `content` will continue directly from that last turn. You can use this
    to constrain the model's output.

    For example, if the input `messages` were:

    ```json
    [
      {
        "role": "user",
        "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
      },
      { "role": "assistant", "content": "The best answer is (" }
    ]
    ```

    Then the response `content` might be:

    ```json
    [{ "type": "text", "text": "B)" }]
    ```
    """

    context_management: Optional[BetaContextManagementResponse] = None
    """Context management response.

    Information about context management strategies applied during the request.
    """

    model: Model
    """
    The model that will complete your prompt.\n\nSee
    [models](https://docs.anthropic.com/en/docs/models-overview) for additional
    details and options.
    """

    role: Literal["assistant"]
    """Conversational role of the generated message.

    This will always be `"assistant"`.
    """

    stop_reason: Optional[BetaStopReason] = None
    """The reason that we stopped.

    This may be one the following values:

    - `"end_turn"`: the model reached a natural stopping point
    - `"max_tokens"`: we exceeded the requested `max_tokens` or the model's maximum
    - `"stop_sequence"`: one of your provided custom `stop_sequences` was generated
    - `"tool_use"`: the model invoked one or more tools
    - `"pause_turn"`: we paused a long-running turn. You may provide the response
      back as-is in a subsequent request to let the model continue.
    - `"refusal"`: when streaming classifiers intervene to handle potential policy
      violations

    In non-streaming mode this value is always non-null. In streaming mode, it is
    null in the `message_start` event and non-null otherwise.
    """

    stop_sequence: Optional[str] = None
    """Which custom stop sequence was generated, if any.

    This value will be a non-null string if one of your custom stop sequences was
    generated.
    """

    type: Literal["message"]
    """Object type.

    For Messages, this is always `"message"`.
    """

    usage: BetaUsage
    """Billing and rate-limit usage.

    Anthropic's API bills and rate-limits by token counts, as tokens represent the
    underlying cost to our systems.

    Under the hood, the API transforms requests into a format suitable for the
    model. The model's output then goes through a parsing stage before becoming an
    API response. As a result, the token counts in `usage` will not match one-to-one
    with the exact visible content of an API request or response.

    For example, `output_tokens` will be non-zero, even for an empty string response
    from Claude.

    Total input tokens in a request is the summation of `input_tokens`,
    `cache_creation_input_tokens`, and `cache_read_input_tokens`.
    """
